***MAKE ANALYSIS SAMPLE 1***

*Load data*
clear
use "Data\Core_datasets\sample_04_thesis_extended.dta"

*Initial steps*
compress, nocoalesce
sort pnr aar

*Age restrictions*
gen age=alder
destring age, replace
keep if age>=19 & age<=60

sort aar
gen individual=1
by aar: egen indi_year=sum(individual)
by aar: egen first_pnr=first(pnr)
gen indi_per_year=indi_year if pnr==first_pnr
sum indi_per_year


drop individual indi_year first_pnr indi_per_year

sort pnr aar
***

*Deflate by yearly effects

gen log_wage=log(job_time_loen_smal)
reg log_wage ib2000.aar i.educ_eika#c.erhverv_akk_new

gen year_effects=.
replace year_effects=0 if aar==2000

replace year_effects=_b[1980.aar] if aar==1980
replace year_effects=_b[1981.aar] if aar==1981
replace year_effects=_b[1982.aar] if aar==1982
replace year_effects=_b[1983.aar] if aar==1983
replace year_effects=_b[1984.aar] if aar==1984
replace year_effects=_b[1985.aar] if aar==1985
replace year_effects=_b[1986.aar] if aar==1986
replace year_effects=_b[1987.aar] if aar==1987
replace year_effects=_b[1988.aar] if aar==1988
replace year_effects=_b[1989.aar] if aar==1989
replace year_effects=_b[1990.aar] if aar==1990
replace year_effects=_b[1991.aar] if aar==1991
replace year_effects=_b[1992.aar] if aar==1992
replace year_effects=_b[1993.aar] if aar==1993
replace year_effects=_b[1994.aar] if aar==1994
replace year_effects=_b[1995.aar] if aar==1995
replace year_effects=_b[1996.aar] if aar==1996
replace year_effects=_b[1997.aar] if aar==1997
replace year_effects=_b[1998.aar] if aar==1998
replace year_effects=_b[1999.aar] if aar==1999

replace year_effects=_b[2001.aar] if aar==2001
replace year_effects=_b[2002.aar] if aar==2002
replace year_effects=_b[2003.aar] if aar==2003
replace year_effects=_b[2004.aar] if aar==2004
replace year_effects=_b[2005.aar] if aar==2005
replace year_effects=_b[2006.aar] if aar==2006
replace year_effects=_b[2007.aar] if aar==2007
replace year_effects=_b[2008.aar] if aar==2008
replace year_effects=_b[2009.aar] if aar==2009
replace year_effects=_b[2010.aar] if aar==2010
replace year_effects=_b[2011.aar] if aar==2011
replace year_effects=_b[2012.aar] if aar==2012
replace year_effects=_b[2013.aar] if aar==2013
replace year_effects=_b[2014.aar] if aar==2014
replace year_effects=_b[2015.aar] if aar==2015
replace year_effects=_b[2016.aar] if aar==2016
replace year_effects=_b[2017.aar] if aar==2017
replace year_effects=_b[2018.aar] if aar==2018

*Subtract year effects
gen log_wage_new=log_wage-year_effects

*Make variables for final education

gen temp_35=1 if age>=35
by pnr: egen ever_35=max(temp_35)

by pnr: egen min_age=min(age) if temp_35==1
by pnr: egen max_age=max(age) if ever_35!=1

gen temp=.
replace temp=hfaudd if ever_35==1 & min_age==age
replace temp=hfaudd if ever_35!=1 & max_age==age

gen temp2=.
replace temp2=year(hf_vfra) if ever_35==1 & min_age==age
replace temp2=year(hf_vfra) if ever_35!=1 & max_age==age

* separate tertiary degrees by university, group institutions for all other degree programs
destring hfinstnr, replace

gen relinst = 0 if hfinstnr != .
replace relinst = 1 if hfinstnr == 851446 | hfinstnr == 851416 | hfinstnr == 561408 | hfinstnr == 281279 | hfinstnr == 151413 | hfinstnr == 280776

replace relinst = 2 if hfinstnr == 101441 | hfinstnr == 101443 | hfinstnr == 101455 | hfinstnr == 147410 | hfinstnr == 101582 | hfinstnr == 280790 | hfinstnr == 281575 | hfinstnr == 281448 | hfinstnr == 281572 | hfinstnr == 281573 | hfinstnr == 281574 | hfinstnr == 281887 | hfinstnr == 281959

replace relinst = 3 if hfinstnr == 751431 | hfinstnr == 751465 | hfinstnr == 281886 | hfinstnr == 751422 | hfinstnr == 751418 | hfinstnr == 657410 | hfinstnr == 101535 | hfinstnr == 280780

replace relinst = 4 if hfinstnr == 330401 | hfinstnr == 561411 | hfinstnr == 537406 | hfinstnr == 621406 | hfinstnr == 461416 | hfinstnr == 461437 | hfinstnr == 461450

replace relinst = 5 if hfinstnr == 265407 | hfinstnr == 280786 | hfinstnr == 280781 

replace relinst = 6 if hfinstnr == 173405 

replace relinst = 7 if hfinstnr == 147406 

replace relinst = 8 if hfinstnr == 101530

gen temp3 = .
replace temp3=relinst if ever_35==1 & min_age==age
replace temp3=relinst if ever_35!=1 & max_age==age 

tab hfinstnr relinst if temp3 != . & relinst > 0

by pnr: egen final_prog=max(temp)
by pnr: egen final_inst=max(temp3)
gen final_educ = 10000*final_inst + final_prog
*egen final_educ = group(final_prog final_inst)
by pnr: egen final_educ_year=max(temp2)

drop temp_35 ever_35 min_age max_age temp temp2 temp3


**Calculate peer wage growth**

*Mark first five years and year ten (for end of growth period - also year 9 and 11)
gen year1=.
replace year1=final_educ_year if final_educ_year>=1980 & final_educ_year!=.
gen year2=year1+1 if year1!=.
gen year3=year1+2 if year1!=.
gen year4=year1+3 if year1!=.
gen year5=year1+4 if year1!=.

gen year9=year1+8 if year1!=.
gen year10=year1+9 if year1!=.
gen year11=year1+10 if year1!=.

*define variable for average wage in first 5 years after hfaudd
by pnr: egen temp=mean(log_wage_new) if aar==year1 | aar==year2 | aar==year3 | aar==year4 | aar==year5
by pnr: egen start_wage_ambition=max(temp)

*define variable for average wage 10-ish years after hfaudd
by pnr: egen temp2=mean(log_wage_new) if aar==year9 | aar==year10 | aar==year11
by pnr: egen ten_wage_ambition=max(temp2)

drop temp temp2

by pnr: egen first_year=min(aar)
gen individual=1 if aar==first_year
drop first_year

*Mark 'old' individuals*
gen old_ambition=0 if year1!=.
replace old_ambition=1 if start_wage_ambition!=. & ten_wage_ambition!=.

*mark extreme indiviidual growth
gen individual_wage_growth_ambition=exp(ten_wage_ambition-start_wage_ambition)-1 if old_ambition==1
sum individual_wage_growth_ambition if individual==1, detail
sca outliers=r(p99)
*mark outliers above the 99th percentile
gen extreme_ambition=0 if year1!=.
replace extreme_ambition=1 if individual_wage_growth_ambition>=outliers & individual_wage_growth_ambition!=. 


*Merge regions on*
merge 1:1 pnr aar using "Data\regions.dta"

drop _merge

*Get regional code when graduating - in order to split up the 9th and 10th graders*
gen grad_year=final_educ_year if final_educ_year>=1980
by pnr: egen first_year=min(aar)
replace grad_year=first_year if grad_year==.

gen region_temp=region if aar==grad_year
by pnr: egen region_temp2=mode(region_temp), maxmode

replace region_temp=region if region_temp2=="" & aar==first_year

by pnr: egen grad_region=mode(region_temp), maxmode

*Change the codes of 9th and 10th graders*

destring grad_region, replace

forvalues i=81(1)85{

replace final_educ=1109`i' if final_educ==1109 & grad_region==`i'
replace final_educ=1110`i' if final_educ==1110 & grad_region==`i'

}


*Group by final_educ*
sort final_educ

*get group variables

by final_educ: egen temp=sum(ten_wage_ambition) if final_educ!=. & final_educ!=1 & individual==1 & old_ambition==1 & extreme_ambition==0  
replace temp=. if temp==0
by final_educ: egen wage10_ambition_temp=max(temp)
drop temp

by final_educ: egen temp=count(ten_wage_ambition) if final_educ!=. & final_educ!=1 & individual==1 & old_ambition==1 & extreme_ambition==0  
by final_educ: egen n_wage10_ambition_group=max(temp)
drop temp

by final_educ: egen temp=sum(start_wage_ambition) if final_educ!=. & final_educ!=1 & individual==1 & old_ambition==1 & extreme_ambition==0  
replace temp=. if temp==0
by final_educ: egen wage_start_ambition_temp=max(temp)
drop temp 

by final_educ: egen temp=count(start_wage_ambition) if final_educ!=. & final_educ!=1 & individual==1 & old_ambition==1 & extreme_ambition==0  
by final_educ: egen n_wage_start_ambition_group=max(temp)
drop temp

*get individual variables
sort pnr aar

gen wage10_ambition=.
replace wage10_ambition=wage10_ambition_temp
 
gen wage_start_ambition=.
replace wage_start_ambition=wage_start_ambition_temp

gen n_wage10_individual_ambition=.
replace n_wage10_individual_ambition=n_wage10_ambition_group

gen n_wage_start_individual_ambition=.
replace n_wage_start_individual_ambition=n_wage_start_ambition_group

gen wage10_mean_ambition=wage10_ambition/n_wage10_individual_ambition if n_wage10_individual_ambition>0

gen wage_start_mean_ambition=wage_start_ambition/n_wage_start_individual_ambition if n_wage_start_individual_ambition>0

*Compute wage growth
gen wage_growth_ambition=.
replace wage_growth_ambition=exp(wage10_mean_ambition-wage_start_mean_ambition)-1

**Save**
save "Data\sample1_new_extended.dta", replace
